######################################
# Media Measurement Matters          #
# Replication Code                   #
# Appendix P: Eady et al. Scores     #
######################################

# The following file contains code for replicating the figures and analyses in
# Appendix P. This section replicates the results in the main manuscript using domain-
# level alignment scores from Eady et al. (2019) instead of Bakshy et al. (2015).

# Set-Up ----

# If desired, set up path into which to save plots and tables
plot_path <- NULL
table_path <- NULL

# Load packages
library(tidyverse)
library(ggridges)
library(ggpubr)
library(estimatr)
library(overlap)
library(gtools)

# Set up helper operations
`%notin%` <- Negate(`%in%`)

# Set up colors
red_mit = '#A31F34'
red_light = '#A9606C'
blue_mit = '#315485'
grey_light= '#C2C0BF'
grey_dark = '#8A8B8C'
black = '#353132'

# Source helper functions
source("helper_functions.R")

# > Read in data ----

# Read in survey data
srvy <- read_rds("data/survey_data_cleaned.rds")

# Read in web data
web <- read_rds("data/web_use.rds")

# Descriptive results ----

# > Respondent-level ----

# Figure P1: distribution of respondent-level alignment scores by partisanship (a),
# ideology (b), and stated media preferences (c), using the Eady et al. scores
p1_a <- score_plots(var = "pid", var_labels = c("Democrats", "Independents", "Republicans"),
                    var_levels = c(-1, 0, 1), x_var = "score_eady",
                    x_label = "Respondent Avg. Slant Score\n(Eady et al. Scores)",
                    y_label = "Party ID", by = 0.75)
ggsave(p1_a, path = plot_path, filename = "fig_p1_a.pdf",
       height= 3, width = 4.5, dpi = 600)

p1_b <- score_plots(var = "ideo", var_labels = c("Liberals", "Moderates", "Conservatives"),
                    var_levels = c(-1, 0, 1), x_var = "score_eady",
                    x_label = "Respondent Avg. Slant Score\n(Eady et al. Scores)",
                    y_label = "Ideology", by = 0.75)
ggsave(p1_b, path = plot_path, filename = "fig_p1_b.pdf",
       height= 3, width = 4.5, dpi = 600)

p1_c <- score_plots(var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                     "Prefer\nEntertainment", 
                                                     "Prefer\nFox"),
                    var_levels = c("MSNBC", "Entertainment", "Fox"), x_var = "score_eady",
                    x_label = "Respondent Avg. Slant Score\n(Eady et al. Scores)",
                    y_label = "Stated Media Preference", by = 0.75)
ggsave(p1_c, path = plot_path, filename = "fig_p1_c.pdf",
       height= 3, width = 4.5, dpi = 600)

# Overlapping coefficient 
overlap_eady_medpref <- score_overlap(var = "score_eady", group_var = "med_pref", 
                                      values = c("MSNBC", "Entertainment", "Fox"),
                                      out_contrasts = c("MSNBC vs. Fox",
                                                        "MSNBC vs. Entertainment",
                                                        "Fox vs. Entertainment"))
(eady_resp_overlap <- overlap_eady_medpref$`MSNBC vs. Fox`)

# > Site-level ----

# Figure P2: distribution of visit-level alignment scores by stated media preferences,
# using the Eady et al. scores
p2 <- visit_plots(df = web, var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                             "Prefer\nEntertainment", 
                                                             "Prefer\nFox"),
                  var_levels = c("MSNBC", "Entertainment", "Fox"),
                  x_label = "Relative Slant of News Visits (Eady et al. Scores)",
                  y_label = "Stated Media Preference", bma = F, 
                  exemplars = c("cnn.com",
                                "news.yahoo.com","msnbc.com",
                                "foxnews.com","breitbart.com"), by = 0.5)

ggsave(p2, path = plot_path, filename = "fig_p2.pdf",
       height= 4, width = 8, dpi = 600)

# Overlapping coefficient
visit_overlap_eady_medpref <- visit_overlap(df = web, bma = F, group_var = "med_pref", 
                                            values = c("MSNBC", "Entertainment", "Fox"),
                                            out_contrasts = c("MSNBC vs. Fox",
                                                              "MSNBC vs. Entertainment",
                                                              "Fox vs. Entertainment"))

(eady_visit_overlap <- visit_overlap_eady_medpref$`MSNBC vs. Fox`)

# Persuasion results ----

# Set theme for plotting
theme_set(theme_bw() + 
            theme(legend.position = "bottom",
                  plot.title = element_text(hjust = 0.5, face = "bold",size = 16),
                  plot.subtitle = element_text(hjust = 0.5, face = "italic", size = 12),
                  axis.title.x = element_text(margin = unit(c(3, 0, 0, 0), "mm"),
                                              face = "bold", size = 12, angle = 0, hjust = 0.5),
                  axis.title.y = element_text(margin = unit(c(0, 3, 0, 0), "mm"), 
                                              face = "bold", size = 12),
                  legend.title = element_text(face = "bold", hjust = 0.5, size = 12),
                  legend.text = element_text(hjust = 0.5, size = 10),
                  axis.text.y = element_text(size = 10, color = "black"),
                  axis.text.x = element_text(size = 10, color = "black"),
                  legend.box = "vertical",
                  legend.background = element_blank(),
                  legend.box.background = element_rect(colour = "black"),
                  text=element_text(colour=black, 
                                    size=15)))

# > Categorize respondents into groups ----

# Identify alignment scores for two exemplar sites: cnn.com, yahoo.com/news
align_scores <- web %>% 
  group_by(domain_recode) %>% 
  summarise(zeta = mean(zeta)) 

cnn <- align_scores %>% filter(domain_recode == "cnn.com") %>% pull(zeta)
yahoo <- align_scores %>% filter(domain_recode == "news.yahoo.com") %>% pull(zeta)

# Classify respondents into groups based on their average alignment score, including
# portal sites
srvy <- srvy %>% 
  mutate(score_code_eady = case_when(is.na(score_eady) ~ NA_real_, # Exemplar sites
                                     score_eady < cnn ~ 1,
                                     score_eady < yahoo ~ 2, TRUE ~ 3),
         score_eady_bin3 = ntile(score_eady, 3)) # Terciles

srvy %>% 
  filter(forcedchoice == 1) %>% 
  pull(score_code_eady) %>% 
  table()

# > Exemplar sites ----

# Generate labels
eady_code_labels <- gen_ranges(bin_var = "score_code_eady", parentheses = TRUE,
                               score_version = "score_eady")
eady_code_labels <- paste(c("More Liberal\nThan CNN\n", "Between CNN\nand Yahoo!\n", "More Conserv.\nThan Yahoo!\n"), 
                          eady_code_labels, sep = "")

# Figure P3: relative slant results using Eady et al. scores (based on exemplar sites)
score_vsent_code <- group_vsent_plot(var = "score_code_eady", nbins = 3, 
                                     labels = eady_code_labels, weights = FALSE)

(p3 <- ggplot(na.omit(score_vsent_code %>% filter(id != "Fox vs.\nMSNBC")), 
                           aes(x=factor(val),
                               col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                           "MSNBC vs.\nEntertainment")),
                               shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                             "MSNBC vs.\nEntertainment")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", color = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_code$bin)) + 
    xlab("Relative Slant of News Consumption (Binned, Eady et al. Scores)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels()$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels()$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) + 
    theme(axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, color = "black")))

ggsave(p3, path = plot_path, filename = "fig_p3.pdf", 
       width=9, height=5.25, dpi = 600)

# > Terciles ----

# Generate labels
eady_labels3 <- gen_ranges(bin_var = "score_eady_bin3", score_version = "score_eady", 
                           parentheses = TRUE)
eady_labels3 <- paste(c("Most\nLiberal\n", "Moderate\n", "Most\nConservative\n"), 
                      eady_labels3, sep = "")

# Figure P4: relative slant results using Eady et al. scores (based on terciles)
score_vsent_fx <- vsent_plot(var = "score_eady", nbins = 3, labels = eady_labels3,
                             weights = FALSE)

(p4 <- ggplot(na.omit(score_vsent_fx %>% filter(id != "Fox vs.\nMSNBC")), 
                                 aes(x=factor(val),
                                     col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                 "MSNBC vs.\nEntertainment",
                                                                 "Fox vs.\nMSNBC")),
                                     shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                   "MSNBC vs.\nEntertainment",
                                                                   "Fox vs.\nMSNBC")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", col = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_fx$bin)) + 
    xlab("Relative Slant of News Consumption (Terciles, Eady et al. Scores)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels()$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels()$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) + 
    theme(axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, color = "black")))

ggsave(p4, path = plot_path, filename = "fig_p4.pdf", 
       width=9, height=5.25, dpi = 600)
